{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "! pip install -U pip\n",
    "! pip install -U torch==1.5.1\n",
    "! pip install -U torchaudio==0.5.1\n",
    "! pip install -U matplotlib==3.2.1\n",
    "! pip install -U clearml>=0.16.1\n",
    "! pip install -U tensorboard==2.2.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import torch\n",
    "import torchaudio\n",
    "from torch.utils.tensorboard import SummaryWriter\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from clearml import Task\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "task = Task.init(project_name='Audio Example', task_name='data pre-processing')\n",
    "configuration_dict = {'number_of_samples': 3}\n",
    "configuration_dict = task.connect(configuration_dict)  # enabling configuration override by clearml\n",
    "print(configuration_dict)  # printing actual configuration (after override in remote mode)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tensorboard_writer = SummaryWriter('./tensorboard_logs')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "if not os.path.isdir('./data'):\n",
    "    os.mkdir('./data')\n",
    "yesno_data = torchaudio.datasets.YESNO('./data', download=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_signal(signal, title, cmap=None):\n",
    "    plt.figure()\n",
    "    if signal.ndim == 1:\n",
    "        plt.plot(signal)\n",
    "    else:\n",
    "        plt.imshow(signal, cmap=cmap)    \n",
    "    plt.title(title)\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "pycharm": {
     "name": "#%%\n"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "fixed_sample_rate = 22050\n",
    "for n in range(configuration_dict.get('number_of_samples', 3)):\n",
    "    audio, sample_rate, labels = yesno_data[n]\n",
    "    tensorboard_writer.add_audio('Audio samples/{}'.format(n), audio, n, sample_rate)\n",
    "    \n",
    "    resample_transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=fixed_sample_rate)\n",
    "    melspectogram_transform = torchaudio.transforms.MelSpectrogram(sample_rate=fixed_sample_rate, n_mels=128)\n",
    "    \n",
    "    audio_mono = torch.mean(resample_transform(audio), dim=0, keepdim=True)\n",
    "    plot_signal(audio_mono[0,:], 'Original waveform')\n",
    "    \n",
    "    melspectogram = melspectogram_transform(audio_mono)\n",
    "    plot_signal(melspectogram.squeeze().numpy(), 'Mel spectogram', 'hot')\n",
    "    plot_signal(torchaudio.transforms.AmplitudeToDB()(melspectogram).squeeze().numpy(), 'Mel spectogram DB', 'hot')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
